﻿using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.Text.RegularExpressions;

namespace plagiat_tp
{
    class Canonize
    {
        private string ClearSpaces(string text)
        {
            Regex r = new Regex("\\s+");
            return r.Replace(text, " ");
        }


        private string ClearText(string text)
        {
            string[] StopWords = {".","<",">","!","?",":",";","-","\\","+","=","\\n\\r","(",")",
                                 "это","как", "так"};
            foreach (string sw in StopWords)
            {
                text = text.Replace(sw, " ");
            }
            return text;
        }

        public string CanonText(string text)
        {
            
            return ClearSpaces(ClearText(text));
        }

        
    }
}
